\documentclass{article}
\usepackage{preamble}
%\usepackage{subfig}
\newcommand{\dblspace}{\setlength{\baselineskip}{0.8cm}}
\renewcommand{\pskinny}[2]{p\big(#1|#2\big)}
\usepackage{graphicx} % For figures
\usepackage{subfigure} 
\usepackage{natbib}   % For citations
\usepackage{algorithm}
\usepackage{algorithmic}
\usepackage{hyperref}
\newcommand{\theHalgorithm}{\arabic{algorithm}}
\usepackage[normalem]{ulem}  % for strikethrough
\usepackage{color} % for comments to each other
\usepackage{comment}
%\usepackage{pgfplots}
%\usepackage{icml2012}




% \usepackage[accepted]{icml2012}¡

% The \icmltitle you define below is probably too long as a header.
% Therefore, a short form for the running title is supplied here:
%\icmltitlerunning{Sampling for Bayesian Quadrature}
\title{Appendix: Sampling for Bayesian Quadrature}

\begin{document} 
%\twocolumn[

\section{Covariance of products of functions}
For our later use, consider now the more general integral over the product of two functions, $f$ and $g$,
$$\inty{f g} \deq \int f(\phi)\,g(\phi)\,\po{\phi}\,\ud\phi\,,$$ 
for which, given independent \gp s (with constant mean and Gaussian covariance) and function observations $\vf_{s} \deq f(\vph_{s})$ and  $\vg_{t} \deq g(\vph_{t})$, we have
% \begin{align} \label{eq:mean_inty_fg}
% \mean{\inty{f g}}{\vf_s,\,\vect{g}_t}
% & 
% =\iint \inty{f g}\,\p{\inty{f g}}{f,\,g}\p{f}{\vf_s}\,\p{g}{\vect{g}_t}\, \ud \inty{f g} \,\ud f \,\ud g                                                                                                                                                                 \nonumber\\
% &
% %  =\iint \inty{f g}\,\dd{\inty{f g}}{\int f(\phi)\,g(\phi)\,\po{\phi}\,\ud\phi}\N{f}{\meancondfn{f}{s}}{\covcondfn{f}{s}}\,\N{g}{\meancondfn{g}{s}}{\covcondfn{g}{s}}\, \ud \inty{f g} \,\ud f\,\ud g \nonumber\\
% % &
%  = 
% %\N{\inty{f}}
% {\dtt{s}{f}}\tra \,\Nt_{f,g}(\vph_s,\vph_t)\,\dtt{t}{g}
% %{\varpi_{f}-\ntT{s}{f} K_{f}(\vph_s,\vph_s)\inv \nt{s}{f}}
% \,,
% \end{align}
% where, for $\phi_i \in \vph_s,\,\phi_j \in \vph_t$,
% \begin{align*} %\label{eq:Nt}
% \Nt_{f,g}(\phi_i,\phi_j) & \deq 
% \int K_f(\phi_i,\phi)\,\po{\phi}K_g(\phi,\phi_j)\, \ud\phi\nonumber\\
% & =h_f^2\, h_g^2\,
% \N{\begin{bmatrix} \phi_i \\ \phi_j \end{bmatrix}}{\begin{bmatrix} \nu_{\phi}\\ \nu_{\phi} \end{bmatrix}}{\begin{bmatrix}  \lambda_\phi+W_f & \lambda_\phi \\ \lambda_\phi & \lambda_\phi+W_g
% \end{bmatrix}}\,.
% \end{align*}
% Again, the scales $h_f$ and $h_g$ cancel out of our mean estimate, in this case, \eqref{eq:mean_inty_fg}.
% 
% Finally, consider the  integral over three independent functions, $f$, $g$ and $h$,
% $$\inty{f g h} \deq \int f(\phi)\,g(\phi)\,h(\phi)\,\po{\phi}\,\ud\phi\,,$$ 
\begin{align*} %\label{eq:mean_inty_fgh}
& \mean{\inty{f g}}{\vf_{s},\,\vg_{t}} \nonumber\\
& 
=
\iint \inty{f\,g}\,\p{\inty{f\,g}}{{f\,g}}
% \nonumber\\
% &
%\hspace{3cm}
\p{f}{\vf_s}\p{g}{\vg_t}\ud \inty{f\,g}\ud f\ud g                                                                                                                                                                      \nonumber\\
&
%  =\iint \inty{f g}\,\dd{\inty{f g}}{\int f(\phi)\,g(\phi)\,\po{\phi}\,\ud\phi}\N{f}{\meancondfn{f}{s}}{\covcondfn{f}{s}}\,\N{g}{\meancondfn{g}{s}}{\covcondfn{g}{s}}\, \ud \inty{f g} \,\ud f\,\ud g \nonumber\\
% &
 = 
%\N{\inty{f}}
\mu_f\,\mu_g
+ \mu_f\,\ntT{s}{g}\, \dtt{s}{g}
+ \mu_g\,\ntT{s}{f}\, \dtt{s}{f}
\nonumber\\
&\hspace{3cm}
+ \dtt{{s}}{f}\tra\,\Ot_{f,g}\bigr(\vph_{s},\,\vph_{t}\bigr)\,\dtt{{t}}{g}
%{\varpi_{f}-\ntT{s}{f} K_{f}(\vph_s,\vph_s)\inv \nt{s}{f}}
\,,
\end{align*}
where, for $\phi_i \in \vph_{s}$ and $\phi_j \in \vph_{t}$,
\begin{align*} %\label{eq:Nt}
& \Ot_{f,g}\bigr(\phi_{i},\,\phi_{j}\bigl) 
\deq 
\!\int K_{f}(\phi_i,\phi)\,K_{g}(\phi_j,\phi)\,\po{\phi}\, \ud\phi\nonumber\\
& =h_{f}^2\, h_{g}^2\,
\N{\begin{bmatrix} \phi_i \\ \phi_j \end{bmatrix}}
{\begin{bmatrix} \nu_{\phi}\\ \nu_{\phi}\end{bmatrix}}
{\begin{bmatrix}  
\lambda_\phi+w_{f} & \lambda_\phi \\ 
\lambda_\phi & \lambda_\phi +w_{g}
\end{bmatrix}}\,.
\end{align*}
Here we have overloaded the definition of $\Ot$; its two definitions can be clearly distinguished by the number of arguments with which it is presented. 
Again, the scales $h_f$ and $h_g$ cancel out of our mean estimate.
%, in this case, \eqref{eq:mean_inty_fg}.


\section{Uncertainty about the posterior mean does not change the mean estimate of $\lfn$}
With these assumptions, we have
\begin{align*}
& \mean{\inty{\lfn}}{\psi_0,\tvr_s} \\
& \deq \iint \mean{\psi[\tr]}{\psi_0,\tr}
\p{\tr}{\tvr_s, \theta}\, \p{\theta}{\tvr_s}\ud \tr\,\ud \theta\\
& = \iint \mean{\psi[\tr]}{\psi_0,\tr} \N{\tr}{m_{\tr|s,\theta}}{C_{\tr|s,\theta}}
\\
& \hspace{5cm}\N{\theta}{m_\theta}{C_\theta}\ud \tr\,\ud \theta
\\
& \simeq \iint \mean{\psi[\tr]}{\psi_0,\tr} 
\\
& \hspace{2cm}\N{\tr}
{m_{\tr|s,m_\theta}+\pderiv{m_{\tr|s,\theta}}{\theta}(\theta-m_\theta)}
{C_{\tr|s,m_\theta}}
\\
& \hspace{4cm}
\N{\theta}{m_\theta}{C_\theta}\ud \tr\,\ud \theta
\\
& = \iint \mean{\psi[\tr]}{\psi_0,\tr} \\
& \hspace{1cm}\N{\tr}
{m_{\tr|s,m_\theta}}
{C_{\tr|s,m_\theta}+\pderiv{m_{\tr|s,\theta}}{\theta}C_\theta\pderiv{m\tra_{\tr|s,\theta}}{\theta}}
\ud \tr
\\
& = \mean{\psi[\tr]}{\psi_0,m_{\tr|s,m_\theta}}\,,
\end{align*}
which is identical to \eqref{eq:mean_ev}

\section{Graphical model of GP}

% As with our convention above, we will take knowledge of
% sample locations $\vlfv_s$ to be implicit within $I$. However, as we don't know $f(\lfv)$ for any $\lfv \not \in \vlfvS$, we are uncertain about the function $f(\cdot)$. As a consequence, we are also uncertain about the value of the integral $\inty{f}$. As such, we possess probability distributions over both $f(\cdot)$ and $\inty{f}$. 
% %The resulting Bayesian network is depicted in Figure \ref{fig:BMC}.

%  \begin{figure}[ht]
% \hspace{-1cm}
% 	\begin{pspicture}(-5,0)(5,4.25)%
% 	%\showgrid
% 	\GM@Inode{0}{3.5}{1}%	
% 	%\rput(I){\rput(0,-2){\GM@node{X}}}   \GM@label[angle=90]{X}{$X$}
% 	\rput(0,2){\GM@detnode{psi}}   \GM@label[angle=-90]{psi}{$\rv{\inty{f}}$}
% 
% % NB \lfv is the actual value of the hyperparameters -- doesn't make any sense to write \lfv_i
% 
% 	\rput(psi){\rput(1.25,-2){\GM@plate[plateLabelPos=bl]{2}{4.2}{$i'\not\in s$}}}
% 	\rput(psi){\rput(2.5,1){\GM@node[observed=true]{phij}}}   \GM@label[angle=90]{phij}{$\rv{\lfv}_{i'}$}
% 	\rput(phij){\rput(0,-2){\GM@detnode{qj}}}   \GM@label[angle=130]{qj}{$\rv{f}_{i'}$}
% 
% 	\rput(psi){\rput(-3.25,-2){\GM@plate[plateLabelPos=br]{2}{4.2}{$i \in s$}}}
% 	\rput(psi){\rput(-2.5,1){\GM@node[observed=true]{phii}}}   \GM@label[angle=90]{phii}{$\rv{\lfv}_i$}
% 	\rput(phii){\rput(0,-2){\GM@detnode[observed=true]{qi}}}   \GM@label[angle=50]{qi}{$\rv{f}_i$}
% 
% 	\pnode(0,0.5){mid}
% 
% 	%\ncline[arrows=->]{phi}{X}
% 
% 
% 	\ncline[arrows=->]{phij}{qj}
% 	\ncline[arrows=->]{qj}{psi}
% 	\ncline[arrows=->]{phij}{psi}
% 
% 	\ncline[arrows=->]{phii}{qi}
% 	\ncline[arrows=->]{qi}{psi}
% 	\ncline[arrows=->]{phii}{psi}
% 
% 	\ncarc{qj}{qi}
% 	\nccircle[angleA=-90]{qj}{0.5}
% 	\nccircle[angleA=90]{qi}{0.5}
% 
% 	\end{pspicture}%
% \caption{Bayesian network for Bayesian Quadrature.}
% \label{fig:BMC}
% \end{figure}


\section{Modeling Likelihood Functions}

\begin{align}
& \mean{\inty{\lfn}}{\psi_0,\tvr_s} \nonumber\\
& \deq \int \mean{\psi[\tr]}{\psi_0,\tr}
\p{\tr}{\tvr_s}\, \ud \tr 
\nonumber\\
& = \mean{\psi[\tr]}{\psi_0,m_{\tr|s}} \nonumber\\
& = \mean{\inty{\lfn}}{\vr_s} + \iint \bigl(\mean{r(\lfv)}{\vr_s}+\gamma\bigr)\,\Delta(\lfv)\,\po{\lfv}\ud\lfv
\nonumber\\
& = \mean{\inty{\lfn}}{\vr_s} + \mean{\inty{r \Delta}}{\vr_s} + \gamma\, \mean{\inty{ \Delta}}{\vr_s}
\label{eq:mean_ev}
\end{align}

\end{document} 